# -*- coding: utf-8 -*-
import scrapy


class TokenInfoSpider(scrapy.Spider):
    name = 'token_info'
    allowed_domains = ['etherscan.io']
    start_urls = ['https://etherscan.io/tokens']

    def parse(self, response):
        # 分组
        tr_list = response.xpath('//tbody//tr')
        for tr in tr_list:
            # 创建一个数据字典
            item = {}
            item['token_name'] = tr.xpath('./td[3]/h5/a/text()').extract_first()
            item['token_url'] = 'https://etherscan.io' + tr.xpath('./td[3]/h5/a/@href').extract_first()
            yield item
        # 获取下一页地址
        next_url = 'https://etherscan.io/' + response.xpath('//a[text()="Next"]/@href').extract_first()
        if response.xpath('//a[text()="Next"]/@href').extract_first() != '#':
            yield scrapy.Request(next_url, callback=self.parse)
